{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3cb34437",
   "metadata": {},
   "source": [
    "# Finding patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "06fdaede",
   "metadata": {},
   "outputs": [],
   "source": [
    "%run -i \"../util/file_utils.ipynb\"\n",
    "%run -i \"../util/lang_utils.ipynb\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "194a3671",
   "metadata": {},
   "outputs": [],
   "source": [
    "from spacy.matcher import Matcher\n",
    "matcher = Matcher(small_model.vocab)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cbddd738",
   "metadata": {},
   "outputs": [],
   "source": [
    "patterns = [\n",
    "    [{\"POS\": \"VERB\"}],\n",
    "    [{\"POS\": \"AUX\"}, {\"POS\": \"VERB\"}], \n",
    "    [{\"POS\": \"AUX\"}, {\"POS\": \"ADJ\"}], \n",
    "    [{\"POS\": \"AUX\"}, {\"POS\": \"VERB\"}, {\"POS\": \"ADP\"}]\n",
    "]\n",
    "matcher.add(\"Verb\", patterns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "b8b17880",
   "metadata": {},
   "outputs": [],
   "source": [
    "sherlock_holmes_part_of_text = read_text_file(\"../data/sherlock_holmes_1.txt\")\n",
    "doc = small_model(sherlock_holmes_part_of_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d0b32123",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14677086776663181681 Verb 14 15 heard\n",
      "14677086776663181681 Verb 17 18 mention\n",
      "14677086776663181681 Verb 28 29 eclipses\n",
      "14677086776663181681 Verb 31 32 predominates\n",
      "14677086776663181681 Verb 43 44 felt\n",
      "14677086776663181681 Verb 49 50 love\n",
      "14677086776663181681 Verb 63 65 were abhorrent\n",
      "14677086776663181681 Verb 80 81 take\n",
      "14677086776663181681 Verb 88 89 observing\n",
      "14677086776663181681 Verb 94 96 has seen\n",
      "14677086776663181681 Verb 95 96 seen\n",
      "14677086776663181681 Verb 103 105 have placed\n",
      "14677086776663181681 Verb 104 105 placed\n",
      "14677086776663181681 Verb 114 115 spoke\n",
      "14677086776663181681 Verb 120 121 save\n",
      "14677086776663181681 Verb 130 132 were admirable\n",
      "14677086776663181681 Verb 140 141 drawing\n",
      "14677086776663181681 Verb 153 154 trained\n",
      "14677086776663181681 Verb 157 158 admit\n",
      "14677086776663181681 Verb 167 168 adjusted\n",
      "14677086776663181681 Verb 171 172 introduce\n",
      "14677086776663181681 Verb 173 174 distracting\n",
      "14677086776663181681 Verb 178 179 throw\n",
      "14677086776663181681 Verb 228 229 was\n"
     ]
    }
   ],
   "source": [
    "matches = matcher(doc)\n",
    "for match_id, start, end in matches:\n",
    "    string_id = small_model.vocab.strings[match_id]\n",
    "    span = doc[start:end]\n",
    "    print(match_id, string_id, start, end, span.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "149ffb5f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
